********************************************************************************
* On the Optimal Design of Transfers and Income-Tax Progressivity
* Data: CPS ASEC
********************************************************************************

clear
clear matrix
clear mata
macro drop _all
set more off
set maxvar 10000

global figure_path "`c(pwd)'\Outputs"

********************************************************************************
* Choose version: version 1 is benchmark; other versions 2-9 are robustness
********************************************************************************

include choose_version.do

********************************************************************************
* Prepare original CPS data
********************************************************************************

do extract_cps_census
clear

********************************************************************************
* Prepare CBO imputed data (1980-2020)
********************************************************************************

* Downloaded from https://github.com/US-CBO/means_tested_transfer_imputations
* Last downloaded on December 8, 2022 
do Datasets\CBO_Imputation\source_code\impute_means_tested_transfers_MAIN

********************************************************************************
* Load IPUMS CPS data extract
********************************************************************************

* IPUMS data extract
cd ..\..\..
use "Datasets\IPUMS\cps_00008.dta", clear

* Keep only year 2013
keep if year == 2013

* Generate variable for sequential merge with original CPS from Census
gen obs_n = _n

********************************************************************************
* Merge IPUMS with original CPS from Census
********************************************************************************

* Sequential merge of IPUMS CPS with Census CPS (see https://blog.popdata.org/mergecpsfile/ for information on sequential merge)
merge 1:1 obs_n using "Datasets\Census\asec_2013_orig.dta"
drop _merge

* Check sequential merge based on individual characteristics
assert age == age_orig
assert sex == sex_orig
assert hseq == h_seq

replace race_orig = 100 if race_orig == 1
replace race_orig = 200 if race_orig == 2
replace race_orig = 300 if race_orig == 3
replace race_orig = 651 if race_orig == 4
replace race_orig = 652 if race_orig == 5
replace race_orig = 801 if race_orig == 6
replace race_orig = 802 if race_orig == 7
replace race_orig = 803 if race_orig == 8
replace race_orig = 804 if race_orig == 9
replace race_orig = 805 if race_orig == 10
replace race_orig = 806 if race_orig == 11
replace race_orig = 807 if race_orig == 12
replace race_orig = 808 if race_orig == 13
replace race_orig = 815 if race_orig == 14
replace race_orig = 809 if race_orig == 15
replace race_orig = 810 if race_orig == 16
replace race_orig = 811 if race_orig == 17
replace race_orig = 816 if race_orig == 18
replace race_orig = 812 if race_orig == 19
replace race_orig = 817 if race_orig == 20
replace race_orig = 813 if race_orig == 21
replace race_orig = 818 if race_orig == 22
replace race_orig = 814 if race_orig == 23
replace race_orig = 819 if race_orig == 24
replace race_orig = 820 if race_orig == 25
replace race_orig = 830 if race_orig == 26
assert race == race_orig

replace marital_orig = 1 if marital_orig <= 2
replace marital_orig = 2 if marital_orig == 3
replace marital_orig = 44 if marital_orig == 5
replace marital_orig = 55 if marital_orig == 4
replace marital_orig = 4 if marital_orig == 44
replace marital_orig = 5 if marital_orig == 55
replace marital_orig = 3 if marital_orig == 6
replace marital_orig = 6 if marital_orig == 7
assert marst == marital_orig

drop age_orig sex_orig race_orig marital_orig

********************************************************************************
* Merge with CBO imputation
********************************************************************************

merge 1:1 h_seq pppos using "Datasets\CBO_Imputation\outputs\data\CBO_imputed_means_tested_transfers_2013.dta"
drop _merge

********************************************************************************
* Data preparation: Drop variables and prepare weights
********************************************************************************

* Drop not needed variables
drop month				/// all observations are from ASEC
	 asecflag			// all observations are from ASEC
	 
* Round weights (https://forum.ipums.org/t/asec-national-level-frequencies-with-stata-fweight-or-pweight/3209)
replace asecwth = round(asecwth)
replace asecwt = round(asecwt)

********************************************************************************
* Data preparation
********************************************************************************

* Income variables
do data_prep_inc.do

* Demographic variables
do data_prep_demo.do

* Taxes/credits from the CPS
do data_prep_tax.do

* Transfers from the CBO
do data_prep_cbo.do

* Hours
do data_prep_hours.do

********************************************************************************
* Collapse to household level and select sample
********************************************************************************

* Keep only head of household
drop if serial_id != 1

* Add imputed employer part of payroll taxes to income
if `version_robust' != 8 {
	replace inc_lab_hh = inc_lab_hh + fica_employer_hh
	replace inc_tot_hh = inc_tot_hh + fica_employer_hh
}

* Total tax and transfer amounts before sample selection
do analyze_totals_pre.do

* Sample selection
* Keep only households with heads between 25 and 60
drop if age < 25
drop if age > 60
* Keep only households with labor and total income above 5000
drop if inc_lab_hh < 5000
drop if inc_tot_hh < 5000
* Keep only households whose taxes net of transfers do not exceed either labor or total income
if `version_robust' == 2 {
	egen transfers_aux_hh = rowtotal(snap_impute_val_hh housing_assist_impute_val_hh incwelfr_hh credits_hh state_credits_hh incunemp_hh), missing
}
else if `version_robust' == 6 {
	egen transfers_aux_hh = rowtotal(snap_impute_val_hh housing_assist_impute_val_hh incwelfr_hh credits_hh state_credits_hh mcaid_impute_val_hh ssi_impute_val_hh), missing
}
else {
	egen transfers_aux_hh = rowtotal(snap_impute_val_hh housing_assist_impute_val_hh incwelfr_hh credits_hh state_credits_hh), missing
}
gen divtax_hh_neg = - divtax_hh
if `version_robust' == 4 {
	egen taxes_aux_hh = rowtotal(fedtax_hh ctccrd_hh fica_hh fedretir_hh fica_employer_hh statetax_hh divtax_hh_neg), missing
}
else if `version_robust' == 5 {
	egen taxes_aux_hh = rowtotal(fedtax_hh fica_hh fedretir_hh fica_employer_hh stataxac_pos_hh divtax_hh_neg), missing
}
else if `version_robust' == 8 {
	egen taxes_aux_hh = rowtotal(fedtaxac_pos_hh fica_hh fedretir_hh stataxac_pos_hh divtax_hh_neg), missing
}
else if `version_robust' == 9 {
	egen taxes_aux_hh = rowtotal(fedtaxac_pos_hh fica_hh fedretir_hh fica_employer_hh stataxac_pos_hh), missing
}
else {
	egen taxes_aux_hh = rowtotal(fedtaxac_pos_hh fica_hh fedretir_hh fica_employer_hh stataxac_pos_hh divtax_hh_neg), missing
}
gen taxes_transfers_aux_hh = taxes_aux_hh - transfers_aux_hh
drop if taxes_transfers_aux_hh >= inc_tot_hh	// potentially large disregarded sources of income (in few cases)
drop if taxes_transfers_aux_hh >= inc_lab_hh	// potentially large disregarded sources of income (in few cases)
* Keep only households whose taxes (in the broadest definition: BC plus adding back the CTC) do not exceed labor income
egen taxes_aux_hh_2 = rowtotal(fedtax_hh ctccrd_hh fica_hh fedretir_hh fica_employer_hh statetax_hh divtax_hh_neg), missing
drop if taxes_aux_hh_2 >= inc_lab_hh
drop transfers_aux_hh taxes_aux_hh taxes_transfers_aux_hh taxes_aux_hh_2

* Compute some totals after sample selection
do analyze_totals_post.do

********************************************************************************
* Analyses
********************************************************************************

* Income distributions
do analyze_distributions.do

* Combined tax-and-transfer programs
do analyze_comb_programs.do

* Estimate tax functions
do estimate_tax_fcts.do

* Hours
do analyze_hours.do









